Developer CD Series 1999 January: Mac OS SDK

home *** CD-ROM | disk | FTP | other *** search

/ Developer CD Series 1999 January: Mac OS SDK / Dev.CD Jan 99 SDK2.toast / Development Kits / TEC 1.4 / SampleCode / UnicodeHub / Convert.cp next >

Wrap

Text File | 1998-09-25 | 39.0 KB | 1,135 lines | [TEXT/CWIE]

/* File: Convert.cp Contains: Version: System 8 Copyright: © 1997-1998 by Apple Computer, Inc., all rights reserved. File Ownership: DRI: Julio Gonzalez Other Contact: Andrew Daniels Technology: International Writers: (jag) Julio Gonzalez Change History (most recent first): <TEC1> 6/16/98 jag first checked in <TEC0> 6/16/98 jag Moved to BBS (new change numbers!). Old history in NRBuild: <3> 8/22/97 jag Fix yet another bug in ResolveConversionParams. Also better usage of TECFlush. <2> 8/20/97 jag Fix bug in ResolveConversionParams and set output handle length appropriately in the Catch segment of DoConvert. */ #include "UnicodeHub.h" #include "UnicodeHubConstants.h" #include <Sound.h> #include <Script.h> // Script Manager definitions #include <LGrowZone.h> #include <LWindow.h> #include <PP_Messages.h> #include <PP_Resources.h> #include <PPobClasses.h> #include <UDrawingState.h> #include <UMemoryMgr.h> #include <UReanimator.h> #include <UDesktop.h> #include <URegistrar.h> #include <LEditField.h> #include <LTextEdit.h> #include <UTextTraits.h> #include "LDynamicArray.h" /********************************************************************************************************** * TEC SPECIFIC CODE COMMENT * * When using either the Text Encoding Converter or the Unicode Converter, it is wise to set a minimum output * buffer of 32 bytes. Some conversions might convert a single element in one encoding to 32 bytes in the * other encoding. 32 bytes is currently the maximum that TEC with convert a single element to another * encoding. **********************************************************************************************************/ #define kMinimumBufferSize 32 /********************************************************************************************************** * TEC SPECIFIC CODE COMMENT * * NeedsTag inserts one of the language (CJK) character tags that we have placed in Apple's corporate zone. * We use it when we convert from Styled Text to Unicode in order to achieve Round Trip in our conversions. * If we know that the encoding we are about to convert is Chinese, Korean, or Japanese, then we insert * the appropriate tag in the Unicode stream. Note, there are tags for both UTF-16 and UTF-8. **********************************************************************************************************/ static void NeedsTag( StringPtr tag, TextEncoding theEncoding, TextEncoding unicodeEncoding ) { TextEncodingBase base=::GetTextEncodingBase( theEncoding ); *tag=0; if( ::GetTextEncodingFormat(unicodeEncoding) == kUnicodeUTF8Format ) { switch( base ) //UTF-8 Tags { case kTextEncodingMacJapanese: tag[3]=0x9E; break; case kTextEncodingMacChineseTrad: tag[3]=0x9D; break; case kTextEncodingMacKorean: tag[3]=0x9F; break; case kTextEncodingMacChineseSimp: tag[3]=0x9C; break; default: tag[3]=0; } if( tag[3] ) { tag[0]=3; tag[1]=0xEF; tag[2]=0xA1; } } else //UTF-16 Tags { switch( base ) { case kTextEncodingMacJapanese: tag[2]=0x5E; break; case kTextEncodingMacChineseTrad: tag[2]=0x5D; break; case kTextEncodingMacKorean: tag[2]=0x5F; break; case kTextEncodingMacChineseSimp: tag[2]=0x5C; break; default: tag[2]=0; } if( tag[2] ) { tag[0]=2; tag[1]=0xF8; } } } /********************************************************************************************************** * END TEC SPECIFIC CODE COMMENTED BLOCK **********************************************************************************************************/ // --------------------------------------------------------------------------------- // • ResizeOutputHandle // // This function increase the size of a locked handle by resizeBy and will // keep update a pointer into the handle if it happens to move after the resize // We'll just this routine in the convert methods when we run out of buffer // space and need to make room for more output // --------------------------------------------------------------------------------- void ResizeOutputHandle( Handle h, char** outputPtr, ByteCount resizeBy ); void ResizeOutputHandle( Handle h, char** outputPtr, ByteCount resizeBy ) { UInt32 oldSize=(UInt32)(*outputPtr) - (UInt32)(*h); ::HUnlock( h); ::SetHandleSize( h, GetHandleSize( h) + resizeBy); ThrowIfMemError_(); ::HLock( h); *outputPtr=(char*)( (UInt32)(*h) + oldSize); } // --------------------------------------------------------------------------------- // • ConvertFromMulti // // This method will convert Styled Text into a stream of Unicode. Input is // a text handle and a style record handle. Based on each one of the runs // found in the style handle, a conversion will take place from the appropriate // text encoding derived from the script run to unicode. Offsets into the // unicode stream will be kept so on output, we'll have a handle with a stream // of unicode ( converted to hex - since we can't display Unicode directly // yet :-) and a offset array indicating were in the unicode stream the original // script runs from the styled text occur. // --------------------------------------------------------------------------------- OSStatus CPPStarterApp::ConvertFromMulti( TextEncoding unicodeEncoding, const Handle fromHexText, TEStyleHandle theStyle, UInt32 fromLen, Handle &toHexText, ByteCount &inputRead, ByteCount &outputLen, ByteCount &toLen, ByteOffset** &theOffsets, UInt32 toUnicodeFlags) { TextToUnicodeInfo textToUnicodeInfo; OSStatus status=noErr; char* input; char* output; UnicodeMapping mapping; ByteCount origOutputLen; inputRead=0; outputLen=0; toLen=0; UInt16 noRuns=(**theStyle).nRuns; //Create an array of offsets that is the same size as the number of runs in the styled text //This offset array will be used by the caller to show how each run in the style run matches //the Unicode stream. theOffsets=(ByteOffset**)NewHandle( noRuns * sizeof(ByteOffset) ); if( theOffsets == NULL ) { status=MemError(); return status; } /* Resize our output buffer if necessary to the minimum buffer we recommended earlier*/ outputLen=::GetHandleSize( toHexText ); if( outputLen < kMinimumBufferSize ) { outputLen=kMinimumBufferSize; ::SetHandleSize(toHexText, outputLen); } origOutputLen=outputLen; ::HLock( fromHexText); input=*fromHexText; ::HLock( toHexText); output=*toHexText; //Fill out the unicode mapping structure with the appropriate version of Unicode ( UTF-8 or UTF-16) mapping.unicodeEncoding = unicodeEncoding; mapping.mappingVersion = kUnicodeUseLatestMapping; //loop for every one of the runs in our styled text and convert each run to Unicode int i; for(i=0; i<noRuns && status==noErr; i++) { Str31 fontName; unsigned char tag[4]; short index=(**theStyle).runs[i].styleIndex; /********************************************************************************************************** * TEC SPECIFIC CODE COMMENT * * In order to get an encoding for the specific run in the Styled Text, we call UpgradeScriptInfoToTextEncoding * with the font name only. This is all we need to get an encoding. We'll create our conversion context * based on this encoding **********************************************************************************************************/ ::GetFontName( (*((**theStyle).styleTab))[index].stFont, fontName ); /* If GetFontName fails by returning an empty font name ( possibly due to a bug in the Font itself ), then call FontToScript and use the Script to derive the Text Encoding. I've seen this bug occur on Fonts present in older versions of the Hebrew Language Kit */ if( *fontName ) status=::UpgradeScriptInfoToTextEncoding( kTextScriptDontCare, kTextRegionDontCare, kTextLanguageDontCare, fontName, &mapping.otherEncoding); else status=::UpgradeScriptInfoToTextEncoding( ::FontToScript((*((**theStyle).styleTab))[index].stFont), kTextRegionDontCare, kTextLanguageDontCare, NULL, &mapping.otherEncoding); if( status ) break; status=::CreateTextToUnicodeInfo( &mapping, &textToUnicodeInfo ); /********************************************************************************************************** * END TEC SPECIFIC CODE COMMENTED BLOCK **********************************************************************************************************/ if( !status ) { ByteCount oSourceRead=0; ByteCount oUnicodeLen=0; //assertain the input buffer length from the size of the style run ByteCount inputLen= ( i == noRuns-1 ? fromLen : (**theStyle).runs[i+1].startChar ) - (**theStyle).runs[i].startChar ; //figure out if we need a language tag. If so, insert it in the output stream. NeedsTag( tag, mapping.otherEncoding, unicodeEncoding ); if( *tag ) { if(*tag > outputLen ) { ResizeOutputHandle( toHexText, &output, origOutputLen ); outputLen+=origOutputLen; } ::BlockMove( tag+1, output, *tag ); output+=*tag; outputLen-=*tag; } char* oInput=input; char* oOutput=output; /********************************************************************************************************** * TEC SPECIFIC CODE COMMENT * * Call ConvertFromTextToUnicode in a loop in the event that our output buffer is too small. Each time * the call tells us that the buffer is too small, we just make the output buffer bigger by as much as it * originally was. **********************************************************************************************************/ do{ ByteCount tSourceRead, tUnicodeLen; status=::ConvertFromTextToUnicode( textToUnicodeInfo, inputLen, oInput, toUnicodeFlags, 0, NULL, NULL, NULL, outputLen, &tSourceRead, &tUnicodeLen, (UniChar*)oOutput); if( status == kTECOutputBufferFullStatus || status == kTECBufferBelowMinimumSizeErr ) { //Increase the size of the output buffer and update our input and output pointers ResizeOutputHandle( toHexText, &oOutput, origOutputLen ); oOutput=(char*)( (UInt32)oOutput + tUnicodeLen); outputLen=origOutputLen+(outputLen-tUnicodeLen); oInput=(char*)( (UInt32)oInput + tSourceRead); inputLen-=tSourceRead; } else outputLen-=tUnicodeLen; oSourceRead+=tSourceRead; oUnicodeLen+=tUnicodeLen; }while( status == kTECOutputBufferFullStatus || status == kTECBufferBelowMinimumSizeErr ); /********************************************************************************************************** * END TEC SPECIFIC CODE COMMENTED BLOCK **********************************************************************************************************/ //Now we have processed an entire run of text, so update our offsets accordingly if( status == noErr || status == kTECUsedFallbacksStatus) { (*theOffsets)[i]=toLen; inputRead+=oSourceRead; toLen+=oUnicodeLen; if( *tag ) toLen+=*tag; input+=oSourceRead; output+=oUnicodeLen; status=noErr; } ::DisposeTextToUnicodeInfo( &textToUnicodeInfo ); } } ::HUnlock( (Handle)toHexText ); ::SetHandleSize( (Handle)toHexText, toLen ); outputLen=toLen; //This routine always shows the Unicode output stream in Hex, so go ahead an convert it if( toLen > 0 ) { Handle theHexText=::NewHandle(toLen*2); ByteCount destLen; if( theHexText ) { HLock( toHexText); HLock( theHexText); BufToHex((StringPtr)*toHexText, (StringPtr)*theHexText, toLen, destLen, 0); HUnlock( theHexText); } ::DisposeHandle( toHexText ); toHexText=theHexText; toLen*=2; ::HUnlock( fromHexText ); for(i=i-1; i>=0; i--) (*theOffsets)[i]*=2; } return status; } /********************************************************************************************************** * TEC SPECIFIC CODE COMMENT * * This code tests to see if the encoding is Unicode. The Block from 0x100 to 0x1FF has been currently * reserved for Unicode encodings. In the future there is a small possibility that this might change. * If you require this type of functionality in your Apps, please notify us so that we may provide a * suitable API for you to use. **********************************************************************************************************/ Boolean CPPStarterApp::IsUnicode( TextEncoding encoding ) { TextEncoding encodingBase=::GetTextEncodingBase( encoding ); if( encodingBase >= kTextEncodingUnicodeDefault && encodingBase <= 0x01FF ) return true; return false; } /********************************************************************************************************** * END TEC SPECIFIC CODE COMMENTED BLOCK **********************************************************************************************************/ // --------------------------------------------------------------------------------- // • ResolveConversionParams // // This method turns a selection from our pop up menus into the appropriate // text encodings. Later it determines which converter to use to perform the // conversion. Either the Unicode Converter or the Text Encoding Converter. // Remember, this sample application code show cases the Unicode Converter. // Hence, it tries to do as much as it can in the Unicode Converter. This might // not be wise in your own applications as the Text Encoding Converter might // be able to perform a conversion more efficiently than the Unicode Converter. // Especially if there exists a plugin that can go from encoding x to encoding y // directly. The Unicode Converter on the other hand has to use Unicode as // a hub. Wouldn't you rather get a direct flight to your destination? :-) // --------------------------------------------------------------------------------- void CPPStarterApp::ResolveConversionParams( UInt32 fromEncodingIndex, TextEncoding *fromTextEncoding, UInt32 toEncodingIndex, TextEncoding *toTextEncoding, int *converterToUse ) { OSStatus status; UnicodeMapping findMapping; UnicodeMapping foundMapping; ItemCount oActualCount; findMapping.mappingVersion=kUnicodeUseLatestMapping; if( fromTextEncoding ) { if( fromEncodingIndex != 0 ) *fromTextEncoding = gTheApp->mAvailableMappings[ fromEncodingIndex-1 ]; else *fromTextEncoding=kTextEncodingMultiRun; } if( toTextEncoding ) { if( toEncodingIndex != 0 ) *toTextEncoding = gTheApp->mAvailableMappings[ toEncodingIndex-1 ]; else *toTextEncoding=kTextEncodingMultiRun; } if( converterToUse == NULL || fromTextEncoding == NULL || toTextEncoding == NULL) return; if( *fromTextEncoding == *toTextEncoding ) { *converterToUse = kIllegalConverter; return; } if( IsUnicode( *fromTextEncoding ) ) //if the from encoding is unicode but not UTF7 then return unicode converter { //otherwise return the high level converter if( ( ::GetTextEncodingFormat( *fromTextEncoding ) == kUnicodeUTF7Format ) || IsUnicode( *toTextEncoding) ) *converterToUse = kHighLevelConverter; else { *converterToUse = kUnicodeConverter; //Find out if the Unicode Converter can handle the destination encoding if( *toTextEncoding != kTextEncodingMultiRun ) { findMapping.unicodeEncoding=*fromTextEncoding; findMapping.otherEncoding=*toTextEncoding; status = ::QueryUnicodeMappings(kUnicodeMatchOtherBaseMask | kUnicodeMatchOtherVariantMask | kUnicodeMatchOtherFormatMask, &findMapping, 1, &oActualCount, &foundMapping); if( ( status != noErr && status != kTECArrayFullErr ) || ( oActualCount == 0 ) ) { *converterToUse = kHighLevelConverter; return; } } } } else if( IsUnicode( *toTextEncoding) ) //if the from encoding is unicode but not UTF7 then return unicode converter { //otherwise return the high level converter if( ( GetTextEncodingFormat( *toTextEncoding ) == kUnicodeUTF7Format ) || IsUnicode( *fromTextEncoding) ) *converterToUse = kHighLevelConverter; else { *converterToUse = kUnicodeConverter; //Find out if the Unicode Converter can handle the destination encoding if( *fromTextEncoding != kTextEncodingMultiRun ) { findMapping.unicodeEncoding=*toTextEncoding; findMapping.otherEncoding=*fromTextEncoding; status = ::QueryUnicodeMappings(kUnicodeMatchOtherBaseMask | kUnicodeMatchOtherVariantMask | kUnicodeMatchOtherFormatMask, &findMapping, 1, &oActualCount, &foundMapping); if( ( status != noErr && status != kTECArrayFullErr ) || ( oActualCount == 0 ) ) { *converterToUse = kHighLevelConverter; return; } } } } else { //We've determined that neither encoding is in Unicode so go ahead an determine if we can use the Unicode Converter //as a Hub ( fromEncoding->Unicode->toEncoding ). //If either encoding is a MultiRun, then we can't use either converter to do the job since the Unicode converter //can only convert to/from Unicode and we have already determined that neither encoding is unicode. The high //level on the other hand supports MultiRun conversions but only from Unicode and this is already handled //by the Unicode Converter. if( ( *fromTextEncoding == kTextEncodingMultiRun ) || ( *toTextEncoding == kTextEncodingMultiRun ) ) { *converterToUse = kIllegalConverter; return; } findMapping.unicodeEncoding=kTextEncodingUnicodeDefault; findMapping.otherEncoding=*fromTextEncoding; status = ::QueryUnicodeMappings(kUnicodeMatchOtherBaseMask | kUnicodeMatchOtherVariantMask | kUnicodeMatchOtherFormatMask, &findMapping, 1, &oActualCount, &foundMapping); if( ( status != noErr && status != kTECArrayFullErr ) || ( oActualCount == 0 ) ) { *converterToUse = kHighLevelConverter; return; } findMapping.otherEncoding=*toTextEncoding; status = ::QueryUnicodeMappings(kUnicodeMatchOtherBaseMask | kUnicodeMatchOtherVariantMask | kUnicodeMatchOtherFormatMask, &findMapping, 1, &oActualCount, &foundMapping); if( ( status != noErr && status != kTECArrayFullErr ) || ( oActualCount == 0 ) ) { *converterToUse = kHighLevelConverter; return; } *converterToUse = kUnicodeConverter; } } // --------------------------------------------------------------------------------- // • DoConvertToUnicode // Converts a text stream from a specified encoding to Unicode // --------------------------------------------------------------------------------- OSStatus CPPStarterApp::DoConvertToUnicode( TextEncoding unicodeEncoding, UInt32 toUnicodeFlags, TextEncoding fromEncoding, Handle srcH, ByteCount srcLen, Handle destH, ByteCount &inputRead, ByteCount &unicodeLen, ByteCount maxOutput) { OSStatus status; TextToUnicodeInfo textToUnicodeInfo; UnicodeMapping theMapping; ByteCount origOutputLen; inputRead=0; unicodeLen=0; /* Resize our output buffer if necessary to the minimum buffer we recommended earlier*/ if( maxOutput < kMinimumBufferSize ) { maxOutput=kMinimumBufferSize; ::HUnlock(destH); ::SetHandleSize(destH, maxOutput); ::HLock(destH); } origOutputLen=maxOutput; char *src=*srcH; char *dest=*destH; /* Create a unicodeMapping holding the specified source encoding and the target unicode encoding */ theMapping.unicodeEncoding = unicodeEncoding; theMapping.otherEncoding = fromEncoding; theMapping.mappingVersion = kUnicodeUseLatestMapping; /* Create a conversion context from the specified mapping */ status=::CreateTextToUnicodeInfo( &theMapping, &textToUnicodeInfo); if( status ) Throw_(status); /* Call ConvertFromTextToUnicode as many times as necessary to convert the whole input stream to unicode. The output handle will get resized if necessary to accomodate the conversion of the whole input stream */ do{ ByteCount tSourceRead, tUnicodeLen; status = ::ConvertFromTextToUnicode(textToUnicodeInfo, srcLen, (ConstLogicalAddress) src, toUnicodeFlags, 0, nil, nil, nil, maxOutput, &tSourceRead, &tUnicodeLen, (UniCharArrayPtr)dest); //check to see if we need to adjust our output buffer size. See discussion of this in ConvertFromMulti method. if( status == kTECOutputBufferFullStatus || status == kTECBufferBelowMinimumSizeErr ) { ResizeOutputHandle( destH, &dest, origOutputLen ); dest=(char*)( (UInt32)dest + tUnicodeLen); maxOutput=origOutputLen+(maxOutput-tUnicodeLen); src=(char*)( (UInt32)src + tSourceRead); srcLen-=tSourceRead; } inputRead+=tSourceRead; unicodeLen+=tUnicodeLen; }while( status == kTECOutputBufferFullStatus || status == kTECBufferBelowMinimumSizeErr ); ::DisposeTextToUnicodeInfo( &textToUnicodeInfo ); return status; } // --------------------------------------------------------------------------------- // • DoConvertToMultiple // Attempts to convert the input Unicode stream to a text stream composed // of every single Mac OS Encoding ( derived from the Script ) that is // installed in the machine ( via Language Kits ). // --------------------------------------------------------------------------------- OSStatus CPPStarterApp::DoConvertToMultiple( TextEncoding unicodeEncoding, UInt32 fromUnicodeFlags, TextEncodingRunHdl &theRuns, Handle srcH, ByteCount srcLen, Handle destH, ByteCount &inputRead, ByteCount &outputLen, ByteCount maxOutput) { OSStatus status; UnicodeToTextRunInfo unicodeToTextRunInfo; ItemCount textEncodingRuns; ItemCount oTextEncodingRuns=0; UnicodeMapping mapping={0}; TextEncodingRunPtr theRunsPtr; ByteCount origOutputLen; inputRead=0; outputLen=0; /* Resize our output buffer if necessary to the minimum buffer we recommended earlier*/ if( maxOutput < kMinimumBufferSize ) { maxOutput=kMinimumBufferSize; ::HUnlock(destH); ::SetHandleSize(destH, maxOutput); ::HLock(destH); } origOutputLen=maxOutput; char *src=*srcH; char *dest=*destH; /* Create a conversion context that maps from the specified version of unicode to every encoding that is currently installed in the system */ mapping.unicodeEncoding=unicodeEncoding; status=::CreateUnicodeToTextRunInfo( 0, &mapping, &unicodeToTextRunInfo ); if( status ) Throw_(status); /* Create a handle to hold the text encoding runs that will be derived from converting the Unicode stream into a set of Mac OS encoding runs. Since I don't know what kind of text I'm dealing with, but since this application is written to deal with small amounts of text, I approximately allocate space for 1 run per every 20 characters of input. If this is insufficient, I will adjust the size of the handle. This is all in the code below. For your own Apps this allocation scheme will surely be flawed. Especially if you are dealing with a lot of text. */ textEncodingRuns=( srcLen > 20 ? srcLen/10 : (srcLen > 10 ? srcLen/5 : 2) ); theRuns = (TextEncodingRunHdl)::NewHandle( sizeof(TextEncodingRun)*textEncodingRuns ); if( theRuns == 0 ) Throw_(MemError()); ::HLock( (Handle)theRuns ); theRunsPtr=*theRuns; Boolean ranOutOfSpace=false; do{ ByteCount tSourceRead, tUnicodeLen; status = ::ConvertFromUnicodeToTextRun(unicodeToTextRunInfo, srcLen, (ConstUniCharArrayPtr) src, fromUnicodeFlags, 0, nil, nil, nil, maxOutput, &tSourceRead, &tUnicodeLen, (LogicalAddress)dest, textEncodingRuns, &oTextEncodingRuns, theRunsPtr); /********************************************************************************************************** * TEC SPECIFIC CODE COMMENT * * Here we handle a bit more complex case of kTECOutputBufferFullStatus than in the ConvertFromMulti method * above. The reason being that we need to check if we ran out of encoding run space and of just plain buffer * space. Both cases are handled below. **********************************************************************************************************/ ranOutOfSpace = (status == kTECOutputBufferFullStatus || status == kTECBufferBelowMinimumSizeErr || status == kTECArrayFullErr); if( ranOutOfSpace ) { //check to see if we ran out of encoding run buffer. I know that if I get this error is the encoding run //buffer because I did not make use of the offset array parameters in ConvertFromUnicodeToTextRun. Had //I done so, then I would have to check if I ran of offset buffer space, text encoding run buffer space, //or both. if( status == kTECArrayFullErr ) { ResizeOutputHandle( (Handle)theRuns, &((char*)theRunsPtr), (textEncodingRuns*sizeof(TextEncodingRun)) ); maxOutput-=tUnicodeLen; } else //it must have been that we ran out of buffer space. NOTE, there is the possibility that { //we run out of both output buffer and encoding run buffer space. That is unlikely, but if //it does happen to be the case, when we execute ConvertFromUnicodeToTextRun it will again //return a status error indicating that not enough buffer space for one or the other is //available, at that point we'll just adjust it. ResizeOutputHandle( destH, &dest, origOutputLen ); maxOutput=origOutputLen+(maxOutput-tUnicodeLen); } dest=(char*)( (UInt32)dest + tUnicodeLen); src=(char*)( (UInt32)src + tSourceRead); srcLen-=tSourceRead; } //Adjust our encoding run array if this is not the first time we've gone through the loop. We //need to do this because ConvertFromUnicodeToTextRun encoding run offsets are zero based from //the output stream it produces. The second through nth time we call ConvertFromUnicodeToTextRun, //the input/output streams are no longer zero based as far as the original caller to DoConvertToMultiple //is concerned, so we need to adjust those offsets accordingly. if( outputLen != 0 ) { for( int runIndex=0; runIndex<oTextEncodingRuns; runIndex++) (theRunsPtr[runIndex]).offset+=outputLen; } theRunsPtr+=oTextEncodingRuns; inputRead+=tSourceRead; outputLen+=tUnicodeLen; }while( ranOutOfSpace ); /********************************************************************************************************** * END TEC SPECIFIC CODE COMMENTED BLOCK **********************************************************************************************************/ ::HUnlock( (Handle)theRuns ); ::SetHandleSize( (Handle)theRuns, ::GetHandleSize( (Handle)theRuns) - ( (textEncodingRuns-oTextEncodingRuns)*sizeof(TextEncodingRun) ) ); ::DisposeUnicodeToTextRunInfo( &unicodeToTextRunInfo ); return status; } // --------------------------------------------------------------------------------- // • DoConvertToEncoding // Converts a Unicode stream to a stream of text in the specified encoding // --------------------------------------------------------------------------------- OSStatus CPPStarterApp::DoConvertToEncoding( TextEncoding unicodeEncoding, UInt32 fromUnicodeFlags, TextEncoding toEncoding, Handle srcH, ByteCount srcLen, Handle destH, ByteCount &inputRead, ByteCount &outputLen, ByteCount maxOutput) { OSStatus status; UnicodeToTextInfo unicodeToTextInfo; UnicodeMapping theMapping; ByteCount origOutputLen; inputRead=0; outputLen=0; /* Resize our output buffer if necessary to the minimum buffer we recommended earlier*/ if( maxOutput < kMinimumBufferSize ) { maxOutput=kMinimumBufferSize; ::HUnlock(destH); ::SetHandleSize(destH, maxOutput); ::HLock(destH); } origOutputLen=maxOutput; char *src=*srcH; char *dest=*destH; /* Create a unicodeMapping holding the specified source encoding and the target unicode encoding */ theMapping.unicodeEncoding = unicodeEncoding; theMapping.otherEncoding = toEncoding; theMapping.mappingVersion = kUnicodeUseLatestMapping; /* Create a conversion context from the specified mapping */ status=::CreateUnicodeToTextInfo( &theMapping, &unicodeToTextInfo); if( status ) Throw_(status); /* Call ConvertFromUnicodeToText as many times as necessary to convert the whole unicode stream. The output handle will get resized if necessary to accomodate the conversion of the whole input stream */ do{ ByteCount tSourceRead, tUnicodeLen; status = ::ConvertFromUnicodeToText(unicodeToTextInfo, srcLen, (ConstUniCharArrayPtr) src, fromUnicodeFlags, 0, nil, nil, nil, maxOutput, &tSourceRead, &tUnicodeLen, (LogicalAddress)dest); //check to see if we need to adjust our output buffer size. See discussion of this in ConvertFromMulti method. if( status == kTECOutputBufferFullStatus || status == kTECBufferBelowMinimumSizeErr ) { ResizeOutputHandle( destH, &dest, origOutputLen ); dest=(char*)( (UInt32)dest + tUnicodeLen); maxOutput=origOutputLen+(maxOutput-tUnicodeLen); src=(char*)( (UInt32)src + tSourceRead); srcLen-=tSourceRead; } inputRead+=tSourceRead; outputLen+=tUnicodeLen; }while( status == kTECOutputBufferFullStatus || status == kTECBufferBelowMinimumSizeErr ); ::DisposeUnicodeToTextInfo( &unicodeToTextInfo ); return status; } // --------------------------------------------------------------------------------- // • DoConvertUsingHLC // Converts a text stream from one encoding to another using the Text Encoding // Converter. // --------------------------------------------------------------------------------- OSStatus CPPStarterApp::DoConvertUsingHLC( TextEncoding fromTextEncoding, Handle fromTextH, ByteCount fromTextLen, TextEncoding toTextEncoding, Handle toTextH, ByteCount toTextLen, ByteCount &inputRead, ByteCount &outputLen ) { OSStatus status; TECObjectRef newEncodingConverter; ByteCount origOutputLen; Boolean needsToFlush=true; inputRead=0; outputLen=0; if( fromTextLen == 0 ) return noErr; /* Resize our output buffer if necessary to the minimum buffer we recommended earlier*/ if( toTextLen < kMinimumBufferSize ) { toTextLen=kMinimumBufferSize; ::HUnlock(toTextH); ::SetHandleSize(toTextH, toTextLen); ::HLock(toTextH); } origOutputLen=toTextLen; char *fromText=*fromTextH; char *toText=*toTextH; /* Create a conversion object based on the source and destination encodings */ status = ::TECCreateConverter(&newEncodingConverter, fromTextEncoding, toTextEncoding); if( status ) Throw_(status); /********************************************************************************************************** * TEC SPECIFIC CODE COMMENT * * Converting text using the Text Encoding Converter is no different than using the Unicode Converter. * However, there is one thing to watch out for. That is, some converters may have some output left over * even when there is no more input to process. So instead of calling TECConvertText, we call TECFlushText * to extract any other text that converter plugin may have in it's internal buffers. **********************************************************************************************************/ do{ ByteCount tSourceRead, tUnicodeLen; /* If fromTextLen > 0 then we have some characters in the input stream which haven't been processed by the converter, so call TECConvertText. Otherwise, we've exhausted the input stream, so we just need to flush out the rest of the converted stream from TEC's internal buffers. Hence, we call TECFlushText. TECFlushText will be called at least once. Could be called more depending on how much buffer space is left */ if( fromTextLen > 0 ) status = ::TECConvertText( newEncodingConverter, (ConstTextPtr) fromText, fromTextLen, &tSourceRead, (TextPtr)toText, toTextLen, &tUnicodeLen); else { status=::TECFlushText(newEncodingConverter, (TextPtr) toText, toTextLen, &tUnicodeLen); needsToFlush=false; } if( fromTextLen > 0 ) //only adjust inputRead if not being called from TECFlushText inputRead+=tSourceRead; outputLen+=tUnicodeLen; //check to see if we need to adjust our output buffer size. This adjustment is no different from //what we have seen in the Convert methods above. We just take into consideration that we might //be getting called from a TECFlushText so no adjustments need to be made for the input buffer //or input length. if( status == kTECOutputBufferFullStatus || status == kTECBufferBelowMinimumSizeErr ) { ResizeOutputHandle( toTextH, &toText, origOutputLen ); toTextLen=origOutputLen+(toTextLen-tUnicodeLen); //It could happen that we set the needsToFlush flag to false prematurely, so reset it. if(needsToFlush == false ) needsToFlush=true; } else toTextLen-=tUnicodeLen; toText=(char*)( (UInt32)toText + tUnicodeLen); if( fromTextLen > 0 ) //only adjust src/fromTextLen if not being called from TECFlushText { fromText=(char*)( (UInt32)fromText + tSourceRead); fromTextLen-=tSourceRead; } }while( status == kTECOutputBufferFullStatus || status == kTECBufferBelowMinimumSizeErr || needsToFlush); /********************************************************************************************************** * END TEC SPECIFIC CODE COMMENTED BLOCK **********************************************************************************************************/ ::TECDisposeConverter( newEncodingConverter); return status; } // --------------------------------------------------------------------------------- // • DoConvert // // This method gets called when a conversion needs to take place that isn't // Styled Text in origin. It does not contain much Text Encoding Converter // related features in it. It is just a hub that serves to determine which // conversion needs to take place. // --------------------------------------------------------------------------------- OSStatus CPPStarterApp::DoConvert( const Handle fromHexText, UInt32 fromLen, UInt32 fromEncoding, Handle toHexText, ByteCount &inputRead, ByteCount &outputLen, ByteCount &toLen, UInt32 toEncoding, TextEncodingRunHdl &theRuns, UInt32 fromUnicodeFlags, UInt32 toUnicodeFlags, Boolean wantHexOutput) { volatile OSStatus status=noErr; volatile Handle localFromText=0L; volatile Handle localToText=0L; EDebugAction lDebugThrow=gDebugThrow; EDebugAction lDebugSignal=gDebugSignal; SetDebugThrow_(debugAction_Nothing); SetDebugSignal_(debugAction_Nothing); Try_ { Boolean legalHexStr; ByteCount toHexLen; ByteCount localFromLen; TextEncoding fromTextEncoding; TextEncoding toTextEncoding; int converterToUse; // Initialize some of our byte count output parameters inputRead = 0; outputLen = 0; toLen=0; //Create a local fromText and use it to hold the from string consisting of the //converted hex string localFromText=::NewHandle( fromLen*2 ); FailNIL_(localFromText); //Convert our string from Hex ::HLock( fromHexText ); ::HLock( localFromText ); legalHexStr=FromHexToString( (StringPtr)*fromHexText, fromLen, (StringPtr)*localFromText, localFromLen ); if( !legalHexStr ) { ::BlockMove( *fromHexText, *localFromText, fromLen ); localFromLen = fromLen; } ::HUnlock( localFromText ); ::SetHandleSize( localFromText, localFromLen ); ::HLock( localFromText ); ::HUnlock( fromHexText ); toHexLen = ::GetHandleSize(toHexText); // Determine if the Unicode Converter can handle the input and output encodings. If so, then use the Unicode // Converter as a Hub to perform the conversion. Otherwise, use the High Level Converter. ResolveConversionParams( fromEncoding, &fromTextEncoding, toEncoding, &toTextEncoding, &converterToUse ); if( converterToUse == kUnicodeConverter ) { //make a copy of the toText handle localToText = toHexText; ::HandToHand(&localToText); ThrowIfMemError_(); //Convert from encoding x to unicode -- if already unicode, just copy the string if( !IsUnicode(fromTextEncoding) ) { TextEncoding unicodeEncoding; ::HLock( localToText ); unicodeEncoding =IsUnicode(toTextEncoding) ? toTextEncoding : kTextEncodingUnicodeDefault; status=DoConvertToUnicode( unicodeEncoding, toUnicodeFlags, fromTextEncoding, localFromText, localFromLen, localToText, inputRead, outputLen, ::GetHandleSize(localToText)); fromTextEncoding = unicodeEncoding; } else { ::HLock( localToText ); ::BlockMove( *localFromText, *localToText, localFromLen ); inputRead=0; outputLen=localFromLen; } if(status && status!=kTECUsedFallbacksStatus) Throw_( status ); //Convert from unicode to encoding x -- if already encoding x, just copy the string if( !IsUnicode( toTextEncoding) ) { ::HLock( toHexText ); if( toTextEncoding != kTextEncodingMultiRun ) status=DoConvertToEncoding( fromTextEncoding, fromUnicodeFlags, toTextEncoding, localToText, outputLen, toHexText, inputRead, outputLen, toHexLen); else status=DoConvertToMultiple( fromTextEncoding, fromUnicodeFlags, theRuns, localToText, outputLen, toHexText, inputRead, outputLen, toHexLen); ::BlockMove( *toHexText, *localToText, outputLen ); toHexLen = ::GetHandleSize(toHexText); //toHextText's size might have changed during the conversion process } } else if( converterToUse == kHighLevelConverter ) { //make a copy of the toText handle localToText = toHexText; ::HandToHand(&localToText); ThrowIfMemError_(); ::HLock( localToText ); status=DoConvertUsingHLC( fromTextEncoding, localFromText, localFromLen, toTextEncoding, localToText, ::GetHandleSize(localToText), inputRead, outputLen); } else // converterToUse == kIllegalConverter { ::SetHandleSize( localToText, 0 ); outputLen=0; inputRead=0; status=paramErr; ::ParamText("\pConversion requested is illegal or cannot be handled by this App","\p","\p","\p"); ::Alert(kSimpleAlert, NULL); } if( wantHexOutput ) { //When converting to Unicode, we will be converting the string to hex so that the hex representation //for 2 bytes is together plus a space separating it from the next 2 byte sequence. When converting //to anything else, we will put a space between every byte. //Therefore, we need to figure out if we have enough space in our destination buffer. UInt8 clumpSize = (IsUnicode(toTextEncoding)) ? 2 : 1; UInt32 estOutputLen = outputLen*2 + outputLen/clumpSize + outputLen%clumpSize ; if( toHexLen < estOutputLen ) { ::HUnlock( toHexText ); SetHandleSize( toHexText, estOutputLen ); ThrowIfMemError_(); ::HLock( toHexText ); } BufToHex( (StringPtr)*localToText, (StringPtr)*toHexText, outputLen, toLen, clumpSize ); //Delete the runs handle since they don't make any sense in Hex if( theRuns ) { ::DisposeHandle( (Handle)theRuns ); theRuns=0; } } else { //resize the handle in the event that our outputLen is bigger or smaller ::HUnlock( toHexText ); ::SetHandleSize( toHexText, outputLen ); ::HLock( toHexText ); ::BlockMove( *localToText, *toHexText, outputLen ); toLen=outputLen; } if(status && status!=kTECUsedFallbacksStatus) Throw_(status); } Catch_(xErr) { ::SetHandleSize( toHexText, toLen ); ::SysBeep(0); status = xErr; } EndCatch_ if( localFromText ) ::DisposeHandle(localFromText); if( localToText ) ::DisposeHandle(localToText); ::HUnlock( toHexText ); SetDebugThrow_(lDebugThrow); SetDebugSignal_(lDebugSignal); return status; }